The Atari Compendium

home *** CD-ROM | disk | FTP | other *** search

/ The Atari Compendium / The Atari Compendium (Toad Computers) (1994).iso / files / prgtools / gnustuff / tos / futils / futils~1 / src / misc1s.zoo / misc1 / combine / main.c < prev next >

Wrap

C/C++ Source or Header | 1991-10-02 | 27.9 KB | 1,138 lines

#include <ctype.h> #include <stdio.h> #include <sys/types.h> #include <sys/stat.h> #include "util.h" #include "combine.h" /* * main: Main program for the COMBINE utility * * This routine is the driver for the utility. * * Return value: * This procedure has no return value. */ void main (argc, argv) int argc; /* command line argument count */ char **argv; /* command line arguments */ { struct stat stat_buf;/* Buf. to find last written date */ /* * Execute program phases. */ if (!isatty (fileno (stdout))) { fstat (fileno (stdout), &stat_buf); setvbuf (stdout, mem_alloc (stat_buf.st_blksize), _IOFBF, stat_buf.st_blksize); } init (argc, argv); /* Perform program initialization */ if (p1_debug || pa_debug) { fputs ("Start Pass1\n", stderr); } pass1 (); /* Read files building symbol table and record arrays. */ if (p1_debug || pa_debug) { dump_sym_tab ("Pass1 symbol table"); dump_arrays ("Pass1 arrays"); } if (p2_debug || pa_debug) { fputs ("Start Pass2\n", stderr); } pass2 (); /* Determine anchor points in files. */ if (p2_debug || pa_debug) { dump_arrays ("Pass2 arrays"); } if (p3_debug || pa_debug) { fputs ("Start Pass3\n", stderr); } pass3 (); /* Expand anchors to non-unique lines. */ if (p3_debug || pa_debug) { dump_arrays ("Pass3 arrays"); } if (p4_debug || pa_debug) { fputs ("Start Pass4\n", stderr); } pass4 (); /* Fix non-uniques surrounded by insertions */ if (p4_debug || pa_debug) { dump_arrays ("Pass4 arrays"); } if (p5_debug || pa_debug) { fputs ("Start Pass5\n", stderr); } pass5 (); /* Write output files. */ if (statistics_flag) { dump_statistics (); } if (old_new1_change_count == 0 && (file_count == 2 || (old_new2_change_count == 0 && new1_new2_change_count == 0))) { exit (0); } else { exit (1); } } /* * dump_arrays: dump arrays for debugging purposes * * This routine outputs the record arrays to the standard output file. * * Return value: * This procedure has no return value. */ void dump_arrays (message) char *message; /* input */ /* Message to print before arrays */ { int i; /* Misc. variable */ int index; /* Index into record array */ int files_left; /* number of files left to do */ bool file_done[MAX_FILE_COUNT];/* TRUE if EOT reached on file */ record_type * record_ptr;/* Pointer to current record */ /* * Initialize completion parameters. */ printf ("%s\n", message); files_left = file_count; for (i = 0; i < file_count; ++i) { file_done[i] = FALSE; } /* * For each iteration of the file read the nth record of the each file. */ for (index = BEGIN_INDEX + 1; files_left != 0; ++index) { printf ("record: %5d ", index); /* * Handle each file. */ for (i = 0; i < file_count; ++i) { if (!file_done[i]) { if (index >= files[i].record_array_size - 1) { file_done[i] = TRUE; --files_left; if (files_left == 0) { break; } printf ("%38.38s", " "); continue; } record_ptr = &(files[i].record[index]); printf (" rfa:%6d val1:%6d val2:%6d", record_ptr -> rfa, record_ptr -> value[0], record_ptr -> value[1]); } else { printf ("%38.38s", " "); } } printf ("\n"); } } /* * dump_statistics: Dump statistics * * This routine outputs the execution statistics * to the standard output * file. * * Return value: * This procedure has no return value. */ void dump_statistics () { int i; /* Misc. variable */ /* * Initialize completion parameters. */ printf ("\fStatistics:\n\n"); printf ("Cache misses: %d\n", cache_miss); printf ("Hash collisions: %d\n", hash_collisions); printf ("Line counts:\n"); for (i = 0; i < file_count; ++i) { printf (" %5d: '%s'\n", files[i].record_array_size - DUMMY_RECORD_COUNT, files[i].name_ptr); } printf ("Changes:\n"); printf (" '%s' and '%s' ", files[OLD_FILE].name_ptr, files[NEW1_FILE].name_ptr); if (old_new1_change_count == 0) { printf ("are identical.\n"); } else { printf ("have %d differences.\n", old_new1_change_count); } if (file_count > 2) { printf (" '%s' and '%s' ", files[OLD_FILE].name_ptr, files[NEW2_FILE].name_ptr); if (old_new2_change_count == 0) { printf ("are identical.\n"); } else { printf ("have %d differences.\n", old_new2_change_count); } printf (" '%s' and '%s' ", files[NEW1_FILE].name_ptr, files[NEW2_FILE].name_ptr); if (new1_new2_change_count == 0) { printf ("are identical.\n"); } else { printf ("have %d differences.\n", new1_new2_change_count); } } } /* * dump_sym_tab: dump symbol table for debugging purposes * * This routine outputs the symbol table to the standard output file. * * Return value: * This procedure has no return value. */ void dump_sym_tab (message) char *message; /* input */ /* Message to print before table */ { int i; /* Misc. variable */ /* * Write each used symbol table entry. */ printf ("%s\n", message); for (i = 0; i < sym_tab_size; ++i) { if (sym_tab_cache_ptr[i] != CACHE_FREE_ENTRY) { printf ("hash:%5d old:%5d new1:%5d ", i, files[OLD_FILE].sym_tab_index[i], files[NEW1_FILE].sym_tab_index[i]); if (file_count == 3) { printf ("new2:%5d ", files[NEW2_FILE].sym_tab_index[i]); } if (sym_tab_cache_ptr[i] == (cache_entry_type *) CACHE_NOT_IN_CACHE) { printf ("(record not in cache)"); } else { if (sym_tab_cache_ptr[i] -> hash_code != i) { printf ("(cache_hash_code wrong: %d)", sym_tab_cache_ptr[i]->hash_code); } if (sym_tab_cache_ptr[i] -> record_length < 0) { sym_tab_cache_ptr[i] ->recordp[0] = '\0'; } else { sym_tab_cache_ptr[i] -> recordp[sym_tab_cache_ptr[i] -> record_length] = '\0'; } printf ("cache_record(%d): %s", sym_tab_cache_ptr[i] -> record_length, sym_tab_cache_ptr[i] -> recordp); } printf ("\n"); } } } /* * print_usage: Print program usage and exit with the given status */ void print_usage (status) int status; /* exit value */ { fputs( "\ Usage: combine [-BbHhqs] [-c #,#] [-d flag] [-L #] [-P #] [-p #]\n\ [-1 text] [-2 text] old_file new1_file [new2_file]\n\ Options:\n\ -H Help option -- print this message and exit.\n\ -b Blank compress option -- treat all whitespace as a single space.\n\ -B Blank remove option -- ignore all whitespace.\n\ -c #,# Column specification option -- specify column range to compare.\n\ -d flag Debug options -- specifies how much debug information is to be\n\ output (<flag> should be one of [1-5] to debug pass #n or\n\ `a' to debug all passes).\n\ -h h option -- produces a composite file on standard output\n\ suitable for input into combine2.\n\ -L # Lines option -- specify the number of lines to print on a page\n\ of output. Specifying a length of zero disables pagination\n\ (default page-length is 66 lines).\n\ -P # Prefix option -- specify the number of unchanged lines to output\n\ prior to any group of changed lines (default is 5 lines).\n\ -p # Postfix option -- specify the number of unchanged lines to output\n\ following any group of changed lines (default is 5 lines).\n\ -q Quiet option -- no output is generated if no changes are detected.\n\ -s Statistics option -- print statistics after the comparison.\n\ -1 text New1 file description -- symbolic description of <new1_file>.\n\ -2 text New2 file description -- symbolic description of <new2_file>.\n\ " , stderr ); exit(status); } /* * init: Perform program initialization. * * * This routine interprets the command line and opens the files. * * Return value: * This procedure has no return value. */ void init (argc, argv) int argc; /* argument count from 'main' */ char **argv; /* arguments from 'main' */ { char *basename_ptr = 0;/* basename of files */ int cache_entry_size;/* Number of bytes in a cache entry */ cache_entry_type * cache_ptr;/* Pointer to cache entry */ int different_basenames = 0; /* TRUE if file basenames are different */ int directory_count = 0; /* number of command line arguments which are actually directories */ FILE * dummy_file; /* can't assign to stdin on UNIX */ long etime; /* Current time of day */ int is_directory[MAX_FILE_COUNT]; /* TRUE if file is a directory */ int i; /* Misc. variable */ int j; /* Misc. variable */ int k; /* Misc. variable */ int max_record_len = LINE_LENGTH; /* max initial record length */ int record_count; /* Number of records in record array */ struct stat stat_buf;/* Buf. to find last written date */ char *the_cache; /* Ptr to head of cache */ char *temp_ptr; /* Misc char ptr */ int total_record_count;/* Total number of records in all files */ int c; /* Option character */ extern int optind; /* Option index */ extern char *optarg; /* Option argument pointer */ extern int getopt ();/* getopt routine */ extern char *ctime ();/* convert time routine */ extern char *strrchr ();/* search for character in string */ #ifdef VOS stdout -> carriage_control = TRUE; #endif /* * Scan options arguments. */ (void) time (&etime); (void) strcpy (exec_time, ctime (&etime)); exec_time[strlen (exec_time) - 1] = '\0'; /* remove newline character */ for (;;) { c = getopt (argc, argv, "HbBhsqc:d:p:P:1:2:L:"); if (c == EOF) { break; } switch (c) { /* * H option - print usage and exit */ case 'H': print_usage(0); break; /* * B and b option: Blank remove and blank compress * options. */ case 'b': blank_compress = TRUE; compress_records = TRUE; break; case 'B': blank_remove = TRUE; compress_records = TRUE; break; /* * c option: Compare only specified columns. */ case 'c': compress_records = TRUE; if ((column_count + 1) == (MAX_COLUMNS)) { error ("Too many -c options"); } for (j = 0; isdigit (optarg[j]); ++j) { } if (j == 0) { error ("-c option not followed by number"); } first_column[column_count] = atoi (optarg) - 1; /* Zero relative */ if (first_column[column_count] < 0) { error ("Column specification less than column 1"); } if (optarg[j] != ',') { error ("Column specifications not separated by comma"); } optarg += j + 1; for (j = 0; isdigit (optarg[j]); ++j) { } if (j == 0) { error ("-c option not followed by two numbers"); } last_column[column_count] = atoi (optarg) - 1; /* Zero relative */ if (last_column[column_count] < first_column[column_count]) { error ("Last column spec. less then first column spec."); } max_record_len = max(max_record_len, last_column[column_count] + 1); column_count++; break; /* * D option: Debug. Print debug output. */ case 'd': switch (*optarg) { case 'a': pa_debug = TRUE; break; case '1': p1_debug = TRUE; break; case '2': p2_debug = TRUE; break; case '3': p3_debug = TRUE; break; case '4': p4_debug = TRUE; break; case '5': p5_debug = TRUE; break; default: error ("invalid argument following -d option"); } break; /* * h option: name of file to output HED edit file to */ case 'h': #ifdef VOS stdout -> carriage_control = FALSE; #endif hed_flag = TRUE; break; /* * -P option: Number of prefix lines to output to listing file. * -p option: Number of postfix lines to output to listing file. */ case 'P': prefix_lines = atoi (optarg); if (prefix_lines > CACHE_ENTRIES - 10) { error ("Too many prefix lines"); } break; case 'p': postfix_lines = atoi (optarg); break; /* * -s option: Output page of statistics to stdout */ case 's': statistics_flag = TRUE; break; /* * -1 option: Text string to associate with 'new1' file. * -2 option: Text string to associate with 'new2' file. */ case '1': files[NEW1_FILE].text_ptr = optarg; break; case '2': files[NEW2_FILE].text_ptr = optarg; break; /* * Q option: Quiet. Produce no output if no differences. */ case 'q': quiet_option = TRUE; break; /* * L option: specify #lines/page for output listing */ case 'L': page_length = atoi (optarg); if ( page_length < 0 ) page_length = PAGE_LENGTH; if ( page_length && (page_length - HEAD_LENGTH) < 0 ) page_length += HEAD_LENGTH; break; default: print_usage(2); break; } } /* * Handle each command line argument. */ for (i = optind; i < argc; ++i) { #ifdef VOS /* * Handle redirections of 'stdin': * * This code won't get executed on a UNIX O.S. However, * on VOS this code allows the same syntax to work. */ if (argv[i][0] == '<' && argv[i][1] != '\0') { dummy_file = freopen (&argv[i][1], "r", stdin); if (dummy_file == 0) { perror(&argv[i][1]); exit( 2 ) ; } /* * Handle redirections of 'stdout': * * This code won't get executed on a UNIX O.S. However, on VOS this * code allows the same syntax to work. */ } else if (argv[i][0] == '>' && argv[i][1] != '\0') { dummy_file = freopen (&argv[i][1], "w", stdout); if (dummy_file == 0) { perror(&argv[i][1]); exit(2); } /* * Handle file arguments not preceeded by a specific option argument. */ } else { #endif if (file_count >= MAX_FILE_COUNT) { error ("Too many files specified"); } files[file_count].name_ptr = argv[i]; stat (files[file_count].name_ptr, &stat_buf); is_directory[file_count] = (stat_buf.st_mode & S_IFMT) == S_IFDIR; if (is_directory[file_count]) { directory_count++; } else { temp_ptr = strrchr (argv[i], '/'); if (temp_ptr == 0) { temp_ptr = argv[i]; } if (basename_ptr && strcmp (temp_ptr, basename_ptr) != 0) { different_basenames = 1; } basename_ptr = temp_ptr; } file_count++; #ifdef VOS } #endif } /* * Resolve actual file names and open files. * * The name specified on the command line might be a directory name. */ if (file_count < 2) { error ("not enough files specified"); } if (file_count == directory_count) { error ("cannot compare directories"); } if (directory_count != 0 && file_count - directory_count > 1 && different_basenames) { error ("ambiguous directory name"); } total_record_count = 0; for (i = 0; i < file_count; ++i) { if (is_directory[i]) { temp_ptr = mem_alloc (strlen (files[i].name_ptr) + strlen (basename_ptr) + 2); sprintf (temp_ptr, "%s/%s", files[i].name_ptr, basename_ptr); files[i].name_ptr = temp_ptr; } #ifdef VOS files[i].seq_fd = fopen (files[i].name_ptr, "r", max_record_len, "s", $OPEN_DB); files[i].rnd_fd = fopen (files[i].name_ptr, "r", max_record_len, "s", $OPEN_RMAI); #else files[i].seq_fd = fopen (files[i].name_ptr, "r"); files[i].rnd_fd = fopen (files[i].name_ptr, "r"); #endif if (files[i].seq_fd == 0 || files[i].rnd_fd == 0) { perror(files[i].name_ptr); exit(2); } fstat (fileno (files[i].seq_fd), &stat_buf); temp_ptr = ctime (&(stat_buf.st_mtime)); temp_ptr[strlen (temp_ptr) - 1] = '\0'; files[i].lw_ptr = mem_alloc (strlen (temp_ptr) + 1); strcpy (files[i].lw_ptr, temp_ptr); setvbuf (files[i].seq_fd, mem_alloc (stat_buf.st_blksize), _IOFBF, stat_buf.st_blksize); setvbuf (files[i].rnd_fd, mem_alloc (stat_buf.st_blksize), _IOFBF, stat_buf.st_blksize); /* estimate record count by assuming 20 chars per record */ /* Don't allow overly small record counts */ record_count = max( stat_buf.st_size / 20, RA_ORIG); files[i].record_array_alloc = record_count; total_record_count += record_count; files[i].record = (record_type *) mem_alloc (record_count * sizeof (record_type)); } /* * Sort column ranges into ascending order. */ for (i = 0; i + 1 < column_count; ++i) { for (j = i + 1; j < column_count; ++j) { if (first_column[i] > first_column[j]) { k = first_column[i]; first_column[i] = first_column[j]; first_column[j] = k; k = last_column[i]; last_column[i] = last_column[j]; last_column[j] = k; } } } /* * Ensure there are no overlapping column ranges. */ for (i = 0; i + 1 < column_count; ++i) { if (last_column[i] >= first_column[i + 1]) { error ("overlaping column ranges specified"); } } /* * Allocate cache entries. * * Cache entries include an extra word at the end of the buffer. * This word allows a word of blanks to be inserted after the end * of each read line. This, in turn, allows hash code computations * and line comparisons to be word oriented rather than byte oriented. * * The cache is allocated in one chunk below for two reasons: * 1) For small files the huge number of allocations consumes * significant time. * 2) Less memory is used since mem_alloc allocates a block * which is larger than is actually requested. (The next larger * power of two.) */ cache_entry_size = sizeof (cache_entry_type) + sizeof (int) + max_record_len; cache_entry_size += sizeof (int) - (cache_entry_size % sizeof (int)); the_cache = mem_alloc (CACHE_ENTRIES * cache_entry_size); for (i = 0; i < CACHE_ENTRIES; ++i) { cache_ptr = (cache_entry_type *) the_cache; cache_ptr -> recordp = the_cache + sizeof(cache_entry_type); cache_ptr -> record_alen = cache_entry_size - sizeof(cache_entry_type); cache_ptr -> hash_code = HASH_FREE_ENTRY; enq_head_dll (cache_head_ptr, cache_tail_ptr, cache_ptr, cache_next_ptr, cache_prev_ptr); the_cache += cache_entry_size; } /* * Compute size of symbol table. * * 1) Initially quess size of symbol table as the sum of the number of * records in all of the input files times 2. * 2) Never allocate a symbol table of less than 1024 entries. (This step * is required due to the organization of the prime number table.) * 3) Round the size down to a multiple of 1024. (This tries to force the * symbol table to be an integer number of pages. It also limits the * size of the prime number table). * 4) Round the size down to a prime number. (The hashing algorithm requires* * that the size of the table is a prime number). */ sym_tab_size = total_record_count * 2; sym_tab_size = max (1024, sym_tab_size); /* Prime number table contains only those primes which are less than and closest to a multiple of 1024 */ for (i = 1; primes[i] != -1; ++i) { if (sym_tab_size < primes[i]) { break; } } sym_tab_size = primes[i - 1]; /* * Allocate symbol table. */ for (i = 0; i < file_count; ++i) { files[i].sym_tab_index = (int *) mem_alloc (sym_tab_size * sizeof (int)); } sym_tab_cache_ptr = (cache_entry_type **) mem_alloc (sym_tab_size * sizeof (cache_entry_type *)); } /* * link_records: link two records together. * * This routine links a record in the current file to a record in the * corresponding file. * * If either of these records are already * linked to a record in the other file, finish up all of the * linkages. Pass5 considers it an inconsistent state if only two of * the three linkages between files are made. Usually, this inconsistent * state will clear itself up. However, certain input files will indeed * allow the inconsistency to remain. * * Note: This routine also discovers an attempt to link records in an * impossible fashion. Suppose, this record in the 'current' file is * already linked to record A in the 'other' file. This record in the * 'corresponding' file is already linked to record B in the 'other' file. * Any attempt to link the current and corresponding records would * require that record A and record B be the same record (impossible). * In that circumstance, this routine acts as a no-op. The calling * routine is not informed since this new information wouldn't change the * decision making process which it is going through. * * Return value: * This procedure has no return value. */ void link_records (match_no, index1, index2) int match_no; /* input */ /* Which relationship is being scanned */ int index1; /* Index into the current file of the record to link. */ int index2; /* Index into the corresponding file of the record to link. */ { file_type * file1_ptr; /* First file - current_file */ file_type * file2_ptr; /* Second file - corresponding file */ file_type * file3_ptr; /* Third file - other file */ int file1_sub; /* For each record of the first file, this is a subscript of the 'value' array of the relationship between file1 and file2 */ int file2_sub; /* For each record of the second file, this is a subscript of the 'value' array of the relationship between file2 and file1 */ int file3_sub; /* For each record of the third file, this is a subscript of the 'value' array of the relationship between file3 and file1 */ int hash_code; /* Hash code for the record being linked. */ int index3; /* Index into record array of file3 is the 'next' record in file3 */ int *other_val1_ptr; /* Pointer to the 'value' field in the record on file1. This is the 'value' which indicates the relationship to file3. */ int *other_val2_ptr; /* Pointer to the 'value' field in the record on file2. This is the 'value' which indicates the relationship to file3. */ int *val1_ptr; /* Pointer to the 'value' field in record on file1. This is the 'value' which indicates the relationship to file2. */ int *val2_ptr; /* Pointer to the 'value' field in record on file2. This is the 'value' which indicates the relationship to file1. */ int *val3_ptr; /* Pointer to the 'value' field in record on file3. */ /* * Set up misc local variables. */ if (p3_debug || p4_debug) { printf ("link_records: matchno: %d indices: %d %d\n", match_no, index1, index2); } file1_ptr = &files[curr_file[match_no]]; file2_ptr = &files[corres_file[match_no]]; file1_sub = value_sub[match_no]; file2_sub = rev_value_sub[match_no]; /* * Link the two records together. */ val1_ptr = &(file1_ptr -> record[index1].value[file1_sub]); val2_ptr = &(file2_ptr -> record[index2].value[file2_sub]); hash_code = *val1_ptr; *val1_ptr = index2; *val2_ptr = index1; /* * If either of these two records are already linked to the third file, * connect these two record to the record in the third file. */ other_val1_ptr = &(file1_ptr -> record[index1].value[other_sub (file1_sub)]); other_val2_ptr = &(file2_ptr -> record[index2].value[other_sub (file2_sub)]); if (is_hash_code (*other_val1_ptr)) { if (*other_val1_ptr != hash_code) { error ("hash code mis-match 1"); } if (is_hash_code (*other_val2_ptr)) { if (*other_val2_ptr != hash_code) { error ("hash code mis-match 2"); } return; } else { index3 = *other_val2_ptr; *other_val1_ptr = index3; } } else { index3 = *other_val1_ptr; if (is_hash_code (*other_val2_ptr)) { if (*other_val2_ptr != hash_code) { error ("hash code mis-match 3"); } *other_val2_ptr = index3; } else { if (*other_val1_ptr != *other_val2_ptr) { /* error( "other file index mismatch 1" ) ; */ /* In this error condition, just undo what we've already done */ *val1_ptr = hash_code; *val2_ptr = hash_code; return; } } } /* * Connect the record in the third file to the record in the first file. */ file3_ptr = &files[other_file[match_no]]; file3_sub = other_value_sub[match_no]; val3_ptr = &(file3_ptr -> record[index3].value[file3_sub]); if (is_hash_code (*val3_ptr)) { if (*val3_ptr != hash_code) { error ("hash code mis-match 4"); } *val3_ptr = index1; } else { if (*val3_ptr != index1) { error ("other file index mismatch 2"); } } /* * Connect the record in the third file to the record in the second file. */ val3_ptr = &(file3_ptr -> record[index3].value[other_sub (file3_sub)]); if (is_hash_code (*val3_ptr)) { if (*val3_ptr != hash_code) { error ("hash code mis-match 5"); } *val3_ptr = index2; } else { if (*val3_ptr != index2) { error ("other file index mismatch 3"); } } } /* * error: output fatal error message * * This routine outputs an error message and terminates. * * Return value: * This procedure has no return value. */ void error (error_ptr) char *error_ptr; /* input */ /* Record to output. */ { fprintf (stderr, "combine: %s.\n", error_ptr); exit (2); } /* * mem_alloc: allocate memory * * This routine uses the standard memory allocator, heowever, if memory * is not available, this routine outputs an error message and terminates. * * Return value: * This procedure returns a pointer to the allocated block. */ char *mem_alloc (size) int size; /* input */ /* Size (in bytes) of the block to allocate */ { char *block_ptr; /* Misc. variable */ extern char *malloc (); block_ptr = malloc (size); if (block_ptr == 0) { error ("not enough memory -- files too big"); } return (block_ptr); } /* * reread_into_cache -- re-read a record from a file into a cache entry * * This routine is used to re-read a record (which has previously been * read) into a cache entry. */ void reread_into_cache( file_ptr, index, cache_ptr ) file_type * file_ptr; /* file to be read from */ int index; /* record number to read */ cache_entry_type * cache_ptr; /* cache entry to read into */ { int status; char mbuffer[LINE_LENGTH]; status = fseek (file_ptr->rnd_fd, file_ptr->record[index].rfa, 0); if ( status == -1 ) { (void) sprintf (mbuffer, "Disk error while seeking '%s'", file_ptr -> name_ptr); error (mbuffer); } status = read_into_cache(file_ptr->rnd_fd, file_ptr->record[index].rfa, cache_ptr); if (status < 0) { (void) sprintf (mbuffer, "Disk error while re-reading '%s'", file_ptr -> name_ptr); error (mbuffer); } } /* * read_into_cache -- read a record from a file into a cache entry * * Read a record into a cache entry. This routine reads an entire record * into the cache entry. If the currently allocated buffer is too small, * a larger buffer will be allocated. * * Return Value: * Byte count read (-1 for EOF) */ int read_into_cache( fp, rfa, cache_ptr) FILE *fp; /* File to read */ rfa_type rfa; /* rfa to read (already positioned) */ cache_entry_type * cache_ptr; /* cache entry to read into */ { char c; char *char_ptr; int status; int i; char_ptr = fgets (cache_ptr->recordp, cache_ptr->record_alen-sizeof(int), fp); if (char_ptr == NULL) return (-1); i = strlen (cache_ptr->recordp) - 1; if (cache_ptr->recordp[i] != '\n') { status = fseek (fp, rfa, 0); if ( status == -1 ) error("Internal error: cannot reseek"); for (i=0;;i++) { c = getc (fp); if (feof (fp)) { /* not (c==EOF) because of binary files */ break; /* This is sort of a kludge, we only check for non-ascii if the record length is too long */ } else if (!isascii (c) || c == '\0' ) { error ("non-ascii character in file"); } else if (c == '\n') { break; } } i+=2; /* Leave room from newline and null byte */ i+=sizeof(int); /* leave space at end for extra nulls for checksum algorithm */ i += sizeof (int) - (i % sizeof (int)); /* * Don't deallocate the old buffer since it was probably * allocated as a part of a larger buffer. */ cache_ptr->recordp = mem_alloc(i); cache_ptr->record_alen = i; status = fseek (fp, rfa, 0); if ( status == -1 ) error("Internal error: cannot reseek"); char_ptr = fgets (cache_ptr->recordp, cache_ptr->record_alen-sizeof(int), fp); if (char_ptr == NULL) return (-1); i = strlen (cache_ptr->recordp) - 1; /* Perhaps we should warn about this */ if (cache_ptr->recordp[i] != '\n') i++; } cache_ptr->recordp[i] = '\0'; cache_ptr->record_length = i; return (i); }